Contents

import numpy as np 
import pandas as pd 
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
import plotly.express as px
init_notebook_mode(connected=True)
import plotly.graph_objs as go
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import seaborn as sns
import os
arr_1 = np.random.randn(50,4)
df_1 = pd.DataFrame(arr_1,columns=['A','B','C','D'])
df_1.head()
A B C D
0 0.494661 -0.047473 -0.402051 0.236956
1 -0.336509 -0.640901 -0.142603 1.366507
2 0.475715 -0.375774 -1.528094 -1.870601
3 -0.929313 -0.909902 -0.963260 -1.056131
4 0.610604 -1.110729 -0.371158 -0.746082
df_1.plot()
<Axes: >
_images/a87be64b7b0b66f6034e42d8746b8345cd7b1dd87abda0f12c3298ccf25599cb.png
df_stocks = px.data.stocks()
df_stocks.head()
df_stocks.to_csv('all_stocks.csv',index=False)
px.line(df_stocks, x='date', y='AMZN',labels={'x':'Date','y':'Price'})
#Multiple line plot 
df_stocks = px.data.stocks()
px.line(df_stocks, x='date', y=['GOOG','AAPL'],
        labels={'x':'Date','y':'Price'},title='Apple vs.Google') 
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_stocks.date, y=df_stocks.AAPL,mode='lines',name='Apple'))
fig.add_trace(go.Scatter(x=df_stocks.date, y=df_stocks.AMZN,mode='lines+markers',name='Amazon'))
fig.add_trace(go.Scatter(x=df_stocks.date, y=df_stocks.FB,mode='lines+markers',name='Facebook'))
fig.add_trace(go.Scatter(x=df_stocks.date, y=df_stocks.GOOG,mode='lines+markers',name='Google',
                        line=dict(color='firebrick',width=2,dash='dashdot')))
fig.update_layout(title='Stock Price data',xaxis_title='Price',yaxis_title='Date')
fig.update_layout(
xaxis=dict(
showline=True, showgrid=False , showticklabels=True,
    linecolor='rgb(204,204,204)',
    linewidth=2, ticks='outside',tickfont=dict(family='Arial',size=12,color='rgb(82,82,82)',
),
),
yaxis=dict(showgrid=False,zeroline=False,showline=False,showticklabels=False),
autosize=False,
margin=dict(
autoexpand=False, l=100 , r=100 , t=100,),
showlegend=False,plot_bgcolor='white')
df_us = px.data.gapminder().query("country == 'Pakistan'")
print(df_us)
df_us.to_csv("df_us.csv",index=False)
px.bar(df_us, x='year',y='pop')
#This bar chart shows population of Pakistan over period of time
       country continent  year  lifeExp        pop    gdpPercap iso_alpha  \
1164  Pakistan      Asia  1952   43.436   41346560   684.597144       PAK   
1165  Pakistan      Asia  1957   45.557   46679944   747.083529       PAK   
1166  Pakistan      Asia  1962   47.670   53100671   803.342742       PAK   
1167  Pakistan      Asia  1967   49.800   60641899   942.408259       PAK   
1168  Pakistan      Asia  1972   51.929   69325921  1049.938981       PAK   
1169  Pakistan      Asia  1977   54.043   78152686  1175.921193       PAK   
1170  Pakistan      Asia  1982   56.158   91462088  1443.429832       PAK   
1171  Pakistan      Asia  1987   58.245  105186881  1704.686583       PAK   
1172  Pakistan      Asia  1992   60.838  120065004  1971.829464       PAK   
1173  Pakistan      Asia  1997   61.818  135564834  2049.350521       PAK   
1174  Pakistan      Asia  2002   63.610  153403524  2092.712441       PAK   
1175  Pakistan      Asia  2007   65.483  169270617  2605.947580       PAK   

      iso_num  
1164      586  
1165      586  
1166      586  
1167      586  
1168      586  
1169      586  
1170      586  
1171      586  
1172      586  
1173      586  
1174      586  
1175      586  
# Create a stacked bar with more customization
df_tips = px.data.tips()
df_tips.to_csv("df_tips.csv",index=False)
px.bar(df_tips, x='day', y='tip', color='sex', title='Tips by Sex on Each Day',
      labels={'tip': 'Tip Amount', 'day': 'Day of the Week'})
px.bar(df_tips, x="sex", y="total_bill",
             color='smoker', barmode='group')
# Display pop data for countries in Asia in 2007 greater than 2000000
df_asia = px.data.gapminder().query("continent == 'Asia' and year == 2007 and pop > 2.e6")
df_asia.to_csv("df_asia.csv",index=False)
fig = px.bar(df_asia, y='pop', x='country', text='pop', color='country')
# Put bar total value above bars with 2 values of precision
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
# Set fontsize and uniformtext_mode='hide' says to hide the text if it won't fit
fig.update_layout(uniformtext_minsize=8)
# Rotate labels 45 degrees
fig.update_layout(xaxis_tickangle=-45)
df_iris=pd.read_csv("Iris.csv")
df_iris.head()
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
4 5 5.0 3.6 1.4 0.2 Iris-setosa
# Create a scatter plot by defining x, y, different color for count of provided
# column, size based on supplied column and additional data to display on hover
px.scatter(df_iris, x="SepalWidthCm", y="SepalLengthCm", color="Species",
                 size='PetalLengthCm', hover_data=['PetalWidthCm'])
# Create a customized scatter with black marker edges with line width 2, opaque
# and colored based on width. Also show a scale on the right
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df_iris.SepalWidthCm, y=df_iris.SepalLengthCm,
    mode='markers',
    marker_color=df_iris.SepalWidthCm,
    text=df_iris.Species,
    marker=dict(showscale=True)
))
fig.update_traces(marker_line_width=2, marker_size=10)
# Working with a lot of data use Scattergl
fig = go.Figure(data=go.Scattergl(
    x = np.random.randn(100000),
    y = np.random.randn(100000),
    mode='markers',
    marker=dict(
        color=np.random.randn(100000),
        colorscale='Viridis',
        line_width=1
    )
))
fig
# Create Pie chart of the largest nations in Asia
# Color maps here plotly.com/python/builtin-colorscales/
df_samer = px.data.gapminder().query("year == 2007").query("continent == 'Asia'")
df_samer.to_csv("df_samer.csv",index=False)
px.pie(df_samer, values='pop', names='country', 
       title='Population of Asian continent', 
       color_discrete_sequence=px.colors.sequential.RdBu)
# Customize pie chart
colors = ['blue', 'green', 'black', 'purple', 'red', 'brown']
fig = go.Figure(data=[go.Pie(labels=['Water','Grass','Normal','Psychic', 'Fire', 'Ground'], 
                       values=[110,90,80,80,70,60])])
# Define hover info, text size, pull amount for each pie slice, and stroke
fig.update_traces(hoverinfo='label+percent', textfont_size=20,
                  textinfo='label+percent', pull=[0.1, 0, 0.2, 0, 0, 0],
                  marker=dict(colors=colors, line=dict(color='#FFFFFF', width=2)))
dice_1 = np.random.randint(1,7,5000)
dice_2 = np.random.randint(1,7,5000)
dice_sum = dice_1 + dice_2
# bins represent the number of bars to make
# Can define x label, color, title
# marginal creates another plot (violin, box, rug)
fig = px.histogram(dice_sum, nbins=11, labels={'value':'Dice Roll'},
             title='5000 Dice Roll Histogram', marginal='violin',
            color_discrete_sequence=['blue'])
fig.update_layout(
    xaxis_title_text='Dice Roll',
    yaxis_title_text='Dice Sum',
    bargap=0.2, showlegend=False
)
# Stack histograms based on different column data
df_tips = px.data.tips()
px.histogram(df_tips, x="total_bill", color="sex")
# A box plot allows you to compare different variables
# The box shows the quartiles of the data. The bar in the middle is the median 
# The whiskers extend to all the other data aside from the points that are considered to be outliers
df_tips = px.data.tips()
# We can see which sex tips the most, points displays all the data points
px.box(df_tips, x='sex', y='tip', points='all')
# Display tip sex data by day
px.box(df_tips, x='day', y='tip', color='sex')
# Adding standard deviation and mean
fig = go.Figure()
fig.add_trace(go.Box(x=df_tips.sex, y=df_tips.tip, marker_color='blue',
                    boxmean='sd'))
# Complex Styling
df_stocks = px.data.stocks()
fig = go.Figure()
# Show all points, spread them so they don't overlap and change whisker width
fig.add_trace(go.Box(y=df_stocks.GOOG, boxpoints='all', name='Google',
                    fillcolor='blue', jitter=0.5, whiskerwidth=0.2))
fig.add_trace(go.Box(y=df_stocks.AAPL, boxpoints='all', name='Apple',
                    fillcolor='red', jitter=0.5, whiskerwidth=0.2))
# Change background / grid colors
fig.update_layout(title='Google vs. Apple', 
                  yaxis=dict(gridcolor='rgb(255, 255, 255)',
                 gridwidth=3),
                 paper_bgcolor='rgb(243, 243, 243)',
                 plot_bgcolor='rgb(243, 243, 243)')
# Violin Plot is a combination of the boxplot and KDE
# While a box plot corresponds to data points, the violin plot uses the KDE estimation
# of the data points
df_tips = px.data.tips()
px.violin(df_tips, y="total_bill", box=True, points='all')
# Multiple plots
px.violin(df_tips, y="tip", x="smoker", color="sex", box=True, points="all",
          hover_data=df_tips.columns)
# Morph left and right sides based on if the customer smokes
fig = go.Figure()
fig.add_trace(go.Violin(x=df_tips['day'][ df_tips['smoker'] == 'Yes' ],
                        y=df_tips['total_bill'][ df_tips['smoker'] == 'Yes' ],
                        legendgroup='Yes', scalegroup='Yes', name='Yes',
                        side='negative',
                        line_color='blue'))
fig.add_trace(go.Violin(x=df_tips['day'][ df_tips['smoker'] == 'No' ],
                        y=df_tips['total_bill'][ df_tips['smoker'] == 'No' ],
                        legendgroup='Yes', scalegroup='Yes', name='No',
                        side='positive',
                        line_color='red'))
# Create a heatmap using Seaborn data
flights = sns.load_dataset("flights")
flights.to_csv("flights.csv",index=False)
flights
year month passengers
0 1949 Jan 112
1 1949 Feb 118
2 1949 Mar 132
3 1949 Apr 129
4 1949 May 121
... ... ... ...
139 1960 Aug 606
140 1960 Sep 508
141 1960 Oct 461
142 1960 Nov 390
143 1960 Dec 432

144 rows × 3 columns

# You can set bins with nbinsx and nbinsy
fig = px.density_heatmap(flights, x='year', y='month', z='passengers', 
                         color_continuous_scale="Viridis")
fig
# You can add histograms
fig = px.density_heatmap(flights, x='year', y='month', z='passengers', 
                         marginal_x="histogram", marginal_y="histogram")
fig
# Create a 3D scatter plot using flight data
fig = px.scatter_3d(flights, x='year', y='month', z='passengers', color='year',
                   opacity=0.7, width=800, height=400)
fig
fig = px.line_3d(flights, x='year', y='month', z='passengers', color='year')
fig
# With a scatter matrix we can compare changes when comparing column data
fig = px.scatter_matrix(flights, color='month')
fig
# There are many interesting ways of working with maps
# plotly.com/python-api-reference/generated/plotly.express.scatter_geo.html
df = px.data.gapminder().query("year == 2007")
df.to_csv("df.csv",index=False)
fig = px.scatter_geo(df, locations="iso_alpha",
                     color="continent", # which column to use to set the color of markers
                     hover_name="country", # column added to hover information
                     size="pop", # size of markers
                     projection="orthographic")
fig
# You can color complex maps like we do here representing unemployment data

# Allows us to grab data from a supplied URL
from urllib.request import urlopen
# Used to decode JSON data
import json
# Grab US county geometry data
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

# Grab unemployment data based on each counties Federal Information Processing number
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})

# Draw map using the county JSON data, color using unemployment values on a range of 12
fig = px.choropleth(df, geojson=counties, locations='fips', color='unemp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           scope="usa",
                           labels={'unemp':'unemployment rate'}
                          )
fig
# Polar charts display data radially 
# Let's plot wind data based on direction and frequency
# You can change size and auto-generate different symbols as well
df_wind = px.data.wind()
df_wind.to_csv("df_wind.csv",index=False)
px.scatter_polar(df_wind, r="frequency", theta="direction", color="strength",
                size="frequency", symbol="strength")
df_exp = px.data.experiment()
df_exp.to_csv("df_exp.csv",index=False)
px.scatter_ternary(df_exp, a="experiment_1", b="experiment_2", 
                   c='experiment_3', hover_name="group", color="gender")
# This dataframe provides scores for different students based on the level
# of attention they could provide during testing
att_df = sns.load_dataset("attention")
att_df.to_csv("att_df.csv",index=False)
fig = px.line(att_df, x='solutions', y='score', facet_col='subject',
             facet_col_wrap=5, title='Scores Based on Attention')
# Create an animated plot that you can use to cycle through continent
# GDP & life expectancy changes
df_cnt = px.data.gapminder()
df_cnt.to_csv("df_cnt.csv",index=False)

#